Packages and Loading

packs<-c("tidyverse","caret","rmarkdown","rgeos",
         "rnaturalearth","rnaturalearthdata","sf",
         "ggspatial","maps","plotly")
sapply(packs,require,character=T)
##         tidyverse             caret         rmarkdown             rgeos 
##              TRUE              TRUE              TRUE              TRUE 
##     rnaturalearth rnaturalearthdata                sf         ggspatial 
##              TRUE              TRUE              TRUE              TRUE 
##              maps            plotly 
##              TRUE              TRUE
df<-read.csv("covid-19-all.csv")

Analysis

Subset only countries in the Continental United States

us<-df[df$Country.Region == "US" & df$Latitude > 24 & df$Latitude < 54 & 
         df$Longitude < -62 & df$Longitude > -126,]

####Free memory by deleting the unused dataframe
rm(df) 
####Create a basic plot of the counties in the US
plot(us$Longitude,us$Latitude)

## Analyze data attributes

####Create a new vector for the ranges of confirmed cases data
casesRange<-list(c(seq(1,49,1)),c(seq(50,99,1)),c(seq(100,199,1)),c(seq(200,499,1)),
              c(seq(500,999,1)),c(seq(1000,1999,1)),c(seq(2000,9999,1)),c(0))

us$ConfirmedRange<-NA
us$ConfirmedRange[us$Confirmed %in% casesRange[[8]]]<-"0"
us$ConfirmedRange[us$Confirmed %in% casesRange[[1]]]<-"1-49"
us$ConfirmedRange[us$Confirmed %in% casesRange[[2]]]<-"50-99"
us$ConfirmedRange[us$Confirmed %in% casesRange[[3]]]<-"100-199"
us$ConfirmedRange[us$Confirmed %in% casesRange[[4]]]<-"200-499"
us$ConfirmedRange[us$Confirmed %in% casesRange[[5]]]<-"500-999"
us$ConfirmedRange[us$Confirmed %in% casesRange[[6]]]<-"1,000-1,999"
us$ConfirmedRange[us$Confirmed %in% casesRange[[7]]]<-"2,000-9,999"
us$ConfirmedRange[us$Confirmed >= 10000]<-"10,000+"
us$ConfirmedRange<-as.factor(us$ConfirmedRange)

levels(us$ConfirmedRange)
## [1] "0"           "1-49"        "1,000-1,999" "10,000+"     "100-199"    
## [6] "2,000-9,999" "200-499"     "50-99"       "500-999"
####Reorder the factor levels from least to greatest
us$ConfirmedRange<-factor(us$ConfirmedRange,levels = levels(us$ConfirmedRange)[c(1,2,8,5,7,9,3,6,4)])

####Plot the confirmed cases by their American Latitude/Longitude
colors<-c(rgb(127,255,0,maxColorValue = 255),rgb(34,139,34,maxColorValue = 255),rgb(0,100,0,maxColorValue = 255),
          rgb(255,255,0,maxColorValue = 255),rgb(255,165,0,maxColorValue = 255),rgb(255,0,0,maxColorValue = 255),
          rgb(139,0,0,maxColorValue = 255),rgb(128,0,128,maxColorValue = 255),rgb(0,0,0,maxColorValue = 255))

world <- ne_countries(scale = "medium", returnclass = "sf")

states <- st_as_sf(map("state", plot = FALSE, fill = TRUE))
states <- cbind(states, st_coordinates(st_centroid(states)))
## Warning in st_centroid.sf(states): st_centroid assumes attributes are constant
## over geometries of x
## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for longitude/
## latitude data
ggplot(data = world)+
  geom_sf()+
  geom_sf(data = states, fill = NA)+
  coord_sf(xlim = c(-126, -70), ylim = c(24, 50), expand = FALSE)+
  geom_point(data = us, shape = 15, aes(x = Longitude, y = Latitude, group = Province.State, 
                           color = ConfirmedRange), size = 1)+
  scale_color_manual(values = colors)+
  labs(x = "Latitude", y = "Longitude", title = "Confirmed Cases by State")+
  theme_void()

## Next, we can look at the number of deaths per county in each state

####We can reuse the code from before changing it to count deaths instead in a new column
us$Deaths[is.na(us$Deaths)]<-0
us$DeathsRange<-NA
us$DeathsRange[us$Deaths %in% casesRange[[8]]]<-"0"
us$DeathsRange[us$Deaths %in% casesRange[[1]]]<-"1-49"
us$DeathsRange[us$Deaths %in% casesRange[[2]]]<-"50-99"
us$DeathsRange[us$Deaths %in% casesRange[[3]]]<-"100-199"
us$DeathsRange[us$Deaths %in% casesRange[[4]]]<-"200-499"
us$DeathsRange[us$Deaths %in% casesRange[[5]]]<-"500-999"
us$DeathsRange[us$Deaths %in% casesRange[[6]]]<-"1,000-1,999"
us$DeathsRange[us$Deaths %in% casesRange[[7]]]<-"2,000-9,999"
us$DeathsRange[us$Deaths >= 10000]<-"10,000+"
us$DeathsRange<-as.factor(us$DeathsRange)

us$DeathsRange<-factor(us$DeathsRange,levels = levels(us$DeathsRange)[c(1,2,8,5,7,9,3,6,4)])

####We can use largely the same plot code from before to plot the deaths
ggplot(data = world)+
  geom_sf()+
  geom_sf(data = states, fill = NA)+
  coord_sf(xlim = c(-126, -70), ylim = c(24, 50), expand = FALSE)+
  geom_point(data = us, shape = 15, aes(x = Longitude, y = Latitude, group = Province.State, 
                                        color = DeathsRange), size = 1)+
  scale_color_manual(values = colors)+
  labs(x = "Latitude", y = "Longitude", title = "Deaths by State")+
  theme_void()

## Going beyond static images, We can also use plotly to generate an interactive plot to show multiple points at once. ## First, we need to create unique IDs for cities that have reported multiple times.

us$Loc<-paste(us$Latitude,us$Longitude)
us$Loc<-as.factor(us$Loc)

####Then we take only the highest number for cumulative confirmed, deaths, and recovered.
us$Date<-as.Date(us$Date)

usg<- us %>%
  arrange(desc(Date)) %>%
  group_by(Loc) %>%
  top_n(1,Date) ####Select only the latest date for each location
  

g <- list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showland = TRUE,
  landcolor = toRGB("gray70"),
  subunitcolor = toRGB("gray85"),
  countrycolor = toRGB("gray85"),
  countrywidth = 0.5,
  subunitwidth = 0.5
)

fig <- plot_geo(usg, lat = ~Latitude, lon = ~Longitude,  colors = colors)
fig <- fig %>% add_markers(
  text = ~paste(Province.State,paste("Confirmed",Confirmed),paste("Deaths",Deaths),paste("Recovered",Recovered), sep = "<br />"),
  color = ~ConfirmedRange, 
  symbol = I("square"), 
  size = I(8), 
  hoverinfo = "text",
)
fig <- fig %>% layout(
  title = 'Coronavirus in the United States<br />(Hover for totals)', geo = g
)

##This interactive plotly figure displays the total confirmed cases, deaths, and recovered in the United States.
##The plot can be dragged and zoomed and data can be hidden by severity group.
fig